'''
* This is the projet for Brtc LlmOps Platform
* @Author Leon-liao <liaosiliang@alltman.com>
* @Description //TODO 
* @File: 7_study_blob_example.py
* @Time: 2025/10/29
* @All Rights Reserve By Brtc
'''
from typing import Iterator

from langchain_core.document_loaders import BaseBlobParser
from langchain_core.documents import Document
from langchain_core.documents.base import Blob


class CustomParser(BaseBlobParser):
    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
        line_nums = 0
        with blob.as_bytes_io() as f:
            for one_line in f:
                yield Document(
                    page_content=one_line,
                    metadata={"source":blob.source, "line_number": line_nums},
                )

# 数据读取
blob = Blob.from_path("./test.txt")
# 数据解析
parser = CustomParser()

docs = list(parser.lazy_parse(blob))

print(docs)
print(len(docs))
print(docs[0].metadata)